In [1]:
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sn
import emcee
%matplotlib inline

Let us generate a set of points following a straight line and add some noise


In [44]:
nPoints = 6
a = 1.2
b = 0.5
sigma = 0.2
x = 3.0 * np.random.rand(nPoints)
x = np.sort(x)
y = a*x + b
yNoise = y + sigma*np.random.randn(nPoints)

In [45]:
f, ax = pl.subplots()
ax.plot(x, y)
ax.errorbar(x, yNoise, yerr=sigma, fmt='.')


Out[45]:
<Container object of 3 artists>

The best fit is obtained by computing the likelihood and maximizing it (equivalent to the least-squares solution)


In [46]:
coef = np.polyfit(x, yNoise, 1)
print coef


[ 1.37737355  0.27798903]

In [47]:
f, ax = pl.subplots()
ax.errorbar(x, yNoise, yerr=sigma, fmt='.')
ax.plot(x, coef[0]*x + coef[1])


Out[47]:
[<matplotlib.lines.Line2D at 0xc3c5c50>]

But we want to also compute the uncertainty and possible correlation between the parameters. For this reason, we sample the likelihood using a Markov Chain Monte Carlo algorithm.


In [48]:
class linearFit(object):
    def __init__(self, x, y, noise):
        self.x = x
        self.y = y
        self.noise = noise
        self.upper = 5.0
        self.lower = -5.0

    def logPosterior(self, theta):
        if ( (np.any(theta < self.lower)) | (np.any(theta > self.upper)) ):
            return -np.inf
        else:
            model = theta[0] * self.x + theta[1]
            return -np.sum((self.y-model)**2 / (2.0*self.noise**2))        

    def sample(self):        
        ndim, nwalkers = 2, 500                          
        self.theta0 = np.asarray([1.0,1.0])
        p0 = [self.theta0 + 0.01*np.random.randn(ndim) for i in range(nwalkers)]
        self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
        self.sampler.run_mcmc(p0, 100)

In [49]:
out = linearFit(x, yNoise, sigma)
out.sample()
aSamples = out.sampler.chain[:,-10:,0].flatten()
bSamples = out.sampler.chain[:,-10:,1].flatten()

In [50]:
f, ax = pl.subplots(ncols=2, nrows=2, figsize=(12,8))
ax[0,0].plot(aSamples)
ax[0,1].hist(aSamples)
ax[0,0].set_ylabel('a')
ax[0,1].set_xlabel('a')
ax[1,0].plot(bSamples)
ax[1,1].hist(bSamples)
ax[1,0].set_ylabel('b')
ax[1,1].set_xlabel('b')
pl.tight_layout()



In [51]:
pl.hexbin(aSamples,bSamples)


Out[51]:
<matplotlib.collections.PolyCollection at 0xaca7cd0>

In [52]:
print "mu_a={0} - sigma_a={1}".format(np.mean(aSamples),np.std(aSamples))
print "mu_b={0} - sigma_b={1}".format(np.mean(bSamples),np.std(bSamples))


mu_a=1.37715248386 - sigma_a=0.0934684138442
mu_b=0.279195321798 - sigma_b=0.162571291596

In [ ]: